library("ggplot2")
library("forecast")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("jsonlite")
library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v purrr 0.3.4
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag() masks stats::lag()
library("corrplot")
## corrplot 0.92 loaded
library("fastDummies")
library("glmnet")
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-4
library("MASS")
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
df_produccion <- read.csv("data/produccion.csv")
df_compras <- read.csv("data/compras.csv")
str(df_produccion)
## 'data.frame': 6366 obs. of 4 variables:
## $ cantidad : num 6.824986 0.07 0.000014 0.014 0.021 ...
## $ fecha_pedido: chr "2012-05-14" "2012-05-14" "2012-05-14" "2012-05-14" ...
## $ exceso : num 8.12e-02 8.33e-04 1.67e-07 1.67e-04 2.50e-04 ...
## $ material : int 1012887 1012695 1012766 1012512 1012513 1012515 1012887 1012695 1012766 1012512 ...
str(df_compras)
## 'data.frame': 2002 obs. of 5 variables:
## $ material : int 1012510 1012510 1012510 1012510 1012510 1012510 1012510 1012510 1012510 1012510 ...
## $ cantidades : num 2 2 2 2 2 2 2 2 2 2 ...
## $ precio_tonelada: num 223 227 225 223 225 ...
## $ fecha_precio : chr "2012-08-06" "2012-05-21" "2012-07-02" "2012-07-30" ...
## $ importe : num 446 454 449 447 450 ...
comprobación de compras
df_produccion$fecha_pedido <- as.Date(df_produccion$fecha_pedido, format = "%Y-%m-%d")
df_produccion$material <- as.factor(df_produccion$material)
df_compras$fecha_precio <- as.Date(df_compras$fecha_precio, format = "%Y-%m-%d")
df_compras$material <- as.factor(df_compras$material)
Comenzamos con los modelos lineales
library(modeltime)
library(tidymodels)
## -- Attaching packages -------------------------------------- tidymodels 0.2.0 --
## v broom 0.8.0 v rsample 0.1.1
## v dials 0.1.1 v tune 0.2.0
## v infer 1.0.0 v workflows 0.2.6
## v modeldata 0.1.1 v workflowsets 0.2.1
## v parsnip 0.2.1 v yardstick 0.0.9
## v recipes 0.2.0
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x yardstick::accuracy() masks forecast::accuracy()
## x scales::discard() masks purrr::discard()
## x Matrix::expand() masks tidyr::expand()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag() masks stats::lag()
## x Matrix::pack() masks tidyr::pack()
## x MASS::select() masks dplyr::select()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
## x Matrix::unpack() masks tidyr::unpack()
## x recipes::update() masks Matrix::update(), stats::update()
## * Use suppressPackageStartupMessages() to eliminate package startup messages
library(tidyverse)
library(timetk)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:yardstick':
##
## precision, recall, sensitivity, specificity
## The following object is masked from 'package:purrr':
##
## lift
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
for (lev in levels(df_produccion$material)){
title_p = paste("Cantidad de material:", lev, "por fecha")
plt <- df_produccion[df_produccion$material==lev,] %>% plot_time_series(fecha_pedido, cantidad, .title = title_p)
print(plt)
}
pred_list_prod <- htmltools::tagList()
test_list_prod <- htmltools::tagList()
for (lev in levels(df_produccion$material)){
df_produccion_train <-df_produccion[df_produccion$material==lev,]
if(dim(df_produccion_train)[1] != 0 & nrow(df_produccion_train) >=9){
splits <- time_series_split(
data = df_produccion_train,
assess = "1 month",
cumulative = TRUE)
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(fecha_pedido, cantidad)
# model prophet
model_prophet <- prophet_reg(
seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(cantidad ~ material + fecha_pedido, training(splits))
model_prophet
# machine learning - GLM
model_glmnet <- linear_reg(penalty = 0.01) %>%
set_engine("glmnet") %>%
fit(
cantidad ~ wday(fecha_pedido, label=TRUE)
+ month(fecha_pedido, label=TRUE)
+ as.numeric(fecha_pedido),
training(splits)
)
model_glmnet
# modeltime compare
model_tbl <- modeltime_table(
model_prophet,
model_glmnet
)
# calibrate
calib_tbl <- model_tbl %>%
modeltime_calibrate(testing(splits))
# Accuracy
calib_tbl %>% modeltime_accuracy()
title <- paste("Predicción de cantidad para el material:",lev, sep=" ")
title_test <- paste("Visualizacion de test para el material:",lev, sep=" ")
# Test Set Visualization
test_list_prod[[lev]] <- calib_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = df_produccion_train) %>%
plot_modeltime_forecast(.title = title_test, .interactive = TRUE)
print(test_list_prod[[lev]])
# Forecast Future
future_forecast_tbl <- calib_tbl %>%
modeltime_refit(df_produccion_train) %>%
modeltime_forecast(
actual_data = df_produccion_train
)
pred_list_prod[[lev]] <- future_forecast_tbl %>%
plot_modeltime_forecast(.title = title, .interactive = TRUE)
print(pred_list_prod[[lev]])
}
}
htmltools::tagList(pred_list_prod)
htmltools::tagList(test_list_prod)
df_compras %>% plot_time_series(fecha_precio, precio_tonelada)
pred_list_compras <- htmltools::tagList()
test_list_compras <- htmltools::tagList()
for (lev in levels(df_compras$material)){
df_compras_train <-df_compras[df_compras$material==lev,]
if(dim(df_compras_train)[1] != 0){
splits <- time_series_split(
data = df_compras_train,
assess = "1 month",
cumulative = TRUE)
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(fecha_precio, precio_tonelada)
# model prophet
model_prophet <- prophet_reg(
seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(precio_tonelada ~ fecha_precio, training(splits))
model_prophet
# machine learning - GLM
model_glmnet <- linear_reg(penalty = 0.01) %>%
set_engine("glmnet") %>%
fit(
precio_tonelada ~ wday(fecha_precio, label=TRUE)
+ month(fecha_precio, label=TRUE)
+ as.numeric(fecha_precio),
training(splits)
)
model_glmnet
# modeltime compare
model_tbl <- modeltime_table(
model_prophet,
model_glmnet
)
# calibrate
calib_tbl <- model_tbl %>%
modeltime_calibrate(testing(splits))
# Accuracy
calib_tbl %>% modeltime_accuracy()
title <- paste("Predicción de precio para el material:",lev, sep=" ")
# Test Set Visualization
test_list_compras[[lev]] <- calib_tbl %>%
modeltime_forecast(
new_data = testing(splits),
actual_data = df_compras_train) %>%
plot_modeltime_forecast(.title = title, .interactive = TRUE
)
# Forecast Future
future_forecast_tbl <- calib_tbl %>%
modeltime_refit(df_compras_train) %>%
modeltime_forecast(
actual_data = df_compras_train
)
pred_list_compras[[lev]] <- future_forecast_tbl %>%
plot_modeltime_forecast(.title = title, .interactive = TRUE)
print(pred_list_compras[[lev]])
}
}
htmltools::tagList(pred_list_compras)
htmltools::tagList(test_list_compras)